#Author: Carly Knight
#Analysis with new BA 2006 Data
#4/30/10
#Description:Calcualtes Group-Centered Means, Generates Descriptive Statistics, Relevant Graphs, and Quantities of Interest
#Previous Program: DataADMIN1_42710.SAS
#Following Program: HLM Analysis Models



###INSTALL PACKAGES
install.packages("foreign")
#install.packages("lme4")
#install.packages("multilevel")
install.packages("arm")
install.packages("xtable")
install.packages("faraway")
install.packages("Zelig")

#library(lme4)
library(foreign)
#library(multilevel)
library(VGAM)
library(arm)
library(xtable)
library(faraway)
library(Zelig)




###########################################################################################################
# CALCULATE GROUP-CENTERED VARIABLES/ POVERTY SQUARED/ LOG OF ZIPAREA
###########################################################################################################
setwd("L:/Small Replication/NewModels/data/Datafiles/census_msapmsa")
main <-read.csv("analysisfile_ctract_historical.csv")

ppov2<- main$ppov0**2
log.area <- log(main$area)
main1<- cbind(main, ppov2, log.area)

attach(main1)
out <- zelig(formula=bsntotal ~ pblck0+ ppov0 + ppov2 +  pfor0 + pasian + lpopden+
phisp0+ pdfh0 + centercity+ ipovblk0 + ipovfor0 + tag(1 + ppov0 |areakey), 
offset = log.area, data=main1, family = quasipoisson(link=log), model="ls.mixed")

#CALCULATE GROUP-CENTERED DATASET
xvars<- c("areakey", "pblck0", "ppov0" , "ppov2" ,  "pfor0" , "pasian" , "lpopden", "phisp0", "pdfh0" , "centercity" , "ipovblk0" , "ipovfor0", "chppov07", "chdhous07")
xvars1<-main1[xvars]
xvars.mean<- aggregate(x=xvars1, by=list(xvars1$areakey), mean)
colnames(xvars.mean)<- c("areakey","areakey1", "m_pblck0", "m_ppov0" , "m_ppov2" ,  "m_pfor0" , "m_pasian" , "m_lpopden", "m_phisp0", "m_pdfh0" , "m_centercity" , "m_ipovblk0" , "m_ipovfor0", "m_chppov07", "m_chdhous07")
groupmean <- merge(x=main1, y=xvars.mean, by=c("areakey","areakey"), sort=TRUE)
gvars<- c("m_pblck0", "m_ppov0" , "m_ppov2" ,  "m_pfor0" , "m_pasian" , "m_lpopden", "m_phisp0", "m_pdfh0" , "m_centercity" , "m_ipovblk0" , "m_ipovfor0", "m_chppov07", "m_chdhous07")
gvars1<- groupmean[gvars]
xvars<- c("pblck0", "ppov0" , "ppov2" ,  "pfor0" , "pasian" , "lpopden", "phisp0", "pdfh0" , "centercity" , "ipovblk0" , "ipovfor0", "chppov07", "chdhous07")
xvars2<-main1[xvars]

groupcenter1<- xvars2 - gvars1
colnames(groupcenter1)<- c( "gc_pblck0", "gc_ppov0" , "gc_ppov2" ,  "gc_pfor0" , "gc_pasian" , "gc_lpopden", "gc_phisp0", "gc_pdfh0" , "gc_centercity" , "gc_ipovblk0" , "gc_ipovfor0", "gc_chppov07", "gc_chdhous07")
main2<- cbind(main1, groupcenter1)
attach(main2)


###########################################################################################################
# CODING CONCENTRATION OF POVERTY DUMMIES
###########################################################################################################

#CREATE DUMMIES FOR THREE LEVELS OF POVERTY	
	concpov1<-main2
	segs<- c(0, .10, .25)
	segs1<- c(.10, .25, 1)
	names1<- rep(0, length = length(segs1))
	for(i in 1:length(segs1)){ names1[i] <- paste("ipov", i, sep = "")}
	mat1<- data.frame(matrix(0, ncol = length(names1), nrow = nrow(main2)))
	colnames(mat1) <- names1

	for(i in 1:ncol(mat1)){mat1[,i][concpov1$ppov0>= segs[i]& concpov1$ppov0 < segs1[i]] <-1}
	attach(mat1)

	lowpov<- cbind(main2, mat1)

#CREATE INTERACTIONS
	names2<- rep(0, length = length(segs1))
	for(i in 1:length(segs1)){ names2[i] <- paste("xipov", i, sep = "")}
	mat2<-  data.frame(matrix(0, ncol = length(names1), nrow = nrow(lowpov)))
	colnames(mat2) <- names2
	for(i in 1:ncol(mat2)){mat2[,i] <- mat1[,i]*ppov0}
	povinter<- cbind(lowpov, mat2)

#EXPORT (NOTE: THIS FILE IS THEN USED IN HLM TO RE-ESTIMATE POISSON MODEL WITH VARIOUS DUMMIES
	setwd("L:/Small Replication/NewModels/data/Datafiles/census_msapmsa/")
	write.csv(povinter,file = "analysisfile_ctract_povinter3.csv")

###########################################################################################################
# GRAPHS
###########################################################################################################
attach(main2)
plot(gc_ppov0, bsntotal, MAIN = "Group Centered Poverty and Total Count Establishments")
myline.fit <- lm(bsntotal ~ gc_ppov0)
abline(myline.fit) 

#Plot Mean Function by Percent Pov
roundpov <- round(ppov0, 2)
roundpov2<- cbind(roundpov, bsntotal)		
roundpov3<- aggregate(roundpov2, by = list(roundpov),mean)
roundpov4<- aggregate(roundpov2, by = list(roundpov),median)
plot(roundpov4$roundpov, roundpov4$bsntotal, type = "s", main = "Mean Level of Establishments by Proportion Poverty", xlab = "Proportion Poverty", ylab = "Mean Count of Establishments")

#Plot Mean Function by Group Centered in Pov
	gcroundpov <- round(gc_ppov0, 2)
	gcroundpov2<- cbind(gcroundpov, bsntotal)		
	gcroundpov3<- aggregate(gcroundpov2, by = list(gcroundpov),mean)
	gcroundpov4<- aggregate(gcroundpov2, by = list(gcroundpov),median)
	plot(gcroundpov3$gcroundpov, gcroundpov3$bsntotal)

#PLOT BETA COEFFICIENT AS A FUNCTION OF POVERTY LEVEL DUMMY
	povlevel<- read.csv("L:/Small Replication/NewModels/data/Datafiles/census_msapmsa/povertylevel.csv")
	
	min<- -1.96*povlevel$se + povlevel$beta
	max<- 1.96*povlevel$se + povlevel$beta
	povlevel1<-cbind(povlevel, min, max)
	plot(x= povlevel1$povertylevel, y = povlevel1$beta, type = 'p', ylim= c(-2,4), xlab= "Poverty Level", ylab = "Beta Coefficient", main= "Poverty Effect at Various Levels of Poverty Concentration", xaxp=c(1,3,2))
	segments(x0=povlevel1$povertylevel, y0 = povlevel1$min, x1=povlevel1$povertylevel,y1 = povlevel1$max, col = "red")
	abline(h=0,  lty=2)


#PLOT BETA COEFFICIENT AS A FUNCTION OF VARIOUS THRESHOLDS FOR CONCENTRATED POVERTY LEVEL DUMMY
	#NOTE: THIS GRAPH READS IN THE RESULTS OF INTERATIVE HLM OUTPUTS THAT ADJUST THE THRESHOLD OF POVERTY CONCENTRATION AND FINDS THE EFFECT OF POVERTY INTERACTED WITH THE THRESHOLD
	povc<- read.csv("L:/Small Replication/NewModels/data/Datafiles/census_msapmsa/povcoefs.csv")
	povc1 <- povc[2:length(povc)]
	columns<-c()
	for(i in 1:14){
		temp<- povc1[(3*i-2):(3*i)]
		colnames(temp)<- c("coef", "se", "pvalue")
		columns<- rbind(columns,temp)
		}
	min<- -1.96*columns$se + columns$coef
	max<- 1.96*columns$se + columns$coef
	columns<-cbind(columns, min, max)

	poverty<-c()
	dummypov<-c()
	interact<- c()

	for(i in 1:14){
		temp1<- columns[(3*i-2),]
		temp2<- columns[(3*i-1),]
		temp3<- columns[(3*i),]
	
		poverty<-rbind(poverty, temp1)
		dummypov<- rbind(dummypov, temp2)
		interact<- rbind(interact, temp3)
		}

	poverty1<-cbind(segs,poverty)
	dummypov1<-cbind(segs,dummypov)
	interact1<- cbind(segs, interact)

plot(x= interact1$segs, y = interact1$coef, type = 'p',xlim=c(0,.8), ylim= c(-4,7), xlab= "Poverty Concentration", ylab = "Beta Coefficient", main= "Effect of Poverty at Different Levels of Poverty Concentration")
segments(x0=segs , y0 = interact1$min, x1=segs ,y1 = interact1$max, col = "blue")
abline(h=0,  lty=2)
plot(x= poverty1$segs, y = poverty1$coef, type = 'p')

#PLOT URBANCIITY
urban<- read.csv("L:/Small Replication/NewModels/data/Datafiles/census_msapmsa/urbanicity.csv")
	min<- -1.96*urban$se + urban$beta
	max<- 1.96*urban$se + urban$beta
	urban<-cbind(urban, min, max)


plot(x= urban$type, y = urban$beta, type = 'p', xlab= "Urbanicity", ylab = "Beta Coefficient", main= "Effect of Poverty at Different Levels of Urbanicity", xaxp=c(1,3,2))
segments(x0=urban$type , y0 = urban$min, x1=urban$type ,y1 = urban$max, col = "red")
abline(h=0,  lty=2)

##############
#Descriptives
################

#Correlation between Small and Business Analyst Data
	corrs<- read.csv("L:/Small Replication/NewModels/data/Datafiles/Descriptives/businesscounts.csv")
	cor(corrs$SmallCount2000, corrs$GisCount2006)

#Percentage Census Tracts at Levels of Poverty Concentration/Urbanicity (note: need an indicator for inner center city
sum(main2$urbanarea)/nrow(main2)
sum(main2$centercity)/nrow(main2)

sum(povinter$ipov1)/nrow(povinter)
sum(povinter$ipov2)/nrow(povinter)
sum(povinter$ipov3)/nrow(povinter)

############################
#First Differences For Final Model
###########################
povinter<-read.csv("L:/Small Replication/NewModels/data/Datafiles/census_msapmsa/analysisfile_ctract_povinter3.csv")
attach(povinter)

#Using a negative binomial/nonrandom effects
M2 <- zelig( bsntotal ~ gc_lpopden+ gc_pblck0+ ppov0 + gc_pfor0 + gc_pasian +gc_phisp0+ gc_pdfh0 + log.area + centercity+ centercity:ppov0, model = "negbin", data = povinter)
	M2.lowpov <- setx(M2, ppov0 = 0, centercity=1)
	M2.highpov <- setx(M2, ppov0 = .30, centercity = 1)
	M2.out1 <- sim(M2, x = M2.lowpov, x1 = M2.highpov)	
	summary(M2.out1)
	M2.out1<-sim(M2, x = M2.lowpov)
	M2.out2<-sim(M2, x = M2.highpov)



	M2.lowpov <- setx(M2, gc_ppov0 = 0)
	M2.highpov <- setx(M2, gc_ppov0 = 1)
	M2.out1 <- sim(M2, x = M2.lowpov, x1 = M2.highpov)	
	summary(M2.out1)

#mean function/NOTE: THIS IS FOR INVESTIGATORY PURPOSES ONLY- THIS MODEL DOES NOT ALLOW FOR OVERDISPERSION
M2 <- zelig(formula=bsntotal ~  gc_lpopden+ gc_pblck0+ gc_ppov0 + gc_pfor0 + gc_pasian +gc_phisp0+ gc_pdfh0 + gc_centercity+ gc_centercity:gc_ppov0 + tag(1 + gc_ppov0 |areakey), 
offset = log.area, data=povinter, model="poisson.mixed")

xvars<-c("gc_lpopden", "gc_pblck0", "gc_ppov0", "gc_pfor0" , "gc_pasian" , "gc_phisp0", "gc_pdfh0" , "gc_centercity")
xvars1<- povinter[xvars]
xvarmean<- apply(X=xvars1, MARGIN=2, FUN = mean)
varmean.low["ppov0"]<-0
varmean.low["centercity"]<-1
varmean.high["ppov0"]<-.30
varmean.high["centercity"]<-1

beta.draws<- mvrnorm(1000, mu = 

#mean function/NOTE: THIS IS FOR INVESTIGATORY PURPOSES ONLY- THIS MODEL DOES NOT ALLOW FOR OVERDISPERSION
M21 <- zelig(formula=bsntotal ~  gc_lpopden+ gc_pblck0+ gc_ppov0 + gc_pfor0 + gc_pasian +gc_phisp0+ gc_pdfh0 + log.area + gc_centercity+ gc_centercity:gc_ppov0 + tag(1 + gc_ppov0 |areakey), data=povinter, model="poisson.mixed")


	M2.lowpov <- setx(M2, ppov0 = 0, centercity=1)
	M2.highpov <- setx(M2, ppov0 = .30, centercity = 1)
	M2.out1 <- sim(M2, x = M2.lowpov, x1 = M2.highpov)	
	M2sum<-summary(M2)

	summary(z.out)@coefs,

M2.out <- setx(M2,ppov0 = 0, centercity=1 )
s.out1 <- sim(M2, x = M2.out)
> summary(s.out1)

M3 <- zelig(formula=bsntotal ~  gc_lpopden+ gc_pblck0+ gc_ppov0 + gc_pfor0 + gc_pasian +gc_phisp0+ gc_pdfh0 + conc_pov40 + gc_centercity + tag(1 + gc_ppov0 |areakey), 
offset = log.area, data=povinter, model="poisson.mixed")

	M3.lowpov <- setx(M3, conc_pov40 = 0)
	M3.highpov <- setx(M3,  conc_pov40 = 1)
	M3.out1 <- sim(M3, x = M3.lowpov, x1 = M3.highpov)	
	summary(M3.out1)




############################################################################################################
# Unused Code
##################################################################################################################
concpov <- main2
#coding concentration of poverty
segs<- seq(from = .05, to = .7, by = .05)
names<- rep(0, length = length(segs))
for(i in 1:length(segs)){ names[i] <- paste("ipov", i, sep = "")}
mat<- data.frame(matrix(0, ncol = length(names), nrow = nrow(main2)))
colnames(mat) <- names

for(i in 1:ncol(mat)){mat[,i][concpov$ppov0 > segs[i]] <-1}

main3<- cbind(main2, mat)

names2<- rep(0, length = length(segs))
for(i in 1:length(segs)){ names2[i] <- paste("xipov", i, sep = "")}
mat2<-  data.frame(matrix(0, ncol = length(names), nrow = nrow(main2)))
colnames(mat2) <- names2
for(i in 1:ncol(mat2)){mat2[,i] <- mat[,i]*ppov0}

main4<- cbind(main3, mat2)

setwd("L:/Small Replication/NewModels/data/Datafiles/census_msapmsa/")
write.csv(main4,file = "analysisfile_ctract_povinteract.csv")

#set up loop

outputt2<- list()
outputlist2 <- list()
varmat<- list()

options(expressions = 1000)

for(i in 1:3){
	dummy<- mat[,i]
	interact<- mat2[,i]


	temp<- cbind(main4, dummy, interact)

	out <- zelig(formula=bsntotal ~ gc_pblck0+ gc_ppov0 + dummy + interact +  gc_pfor0 + gc_pasian + gc_lpopden+
	gc_phisp0+ gc_pdfh0 + gc_centercity+ gc_ipovblk0 + gc_ipovfor0 + tag(1 + gc_ppov0 |areakey), 
	offset = log.area, data=temp, model="poisson.mixed")

	#collect outputs
	t2coefs <- coef(out)$areakey[1,]
	t2se<-sqrt(diag(vcov(out)))
	var <- vcov(out)

	x<- cbind(t(t2coefs), t2se)
	colnames(x)<- c("coef", "se")

	outputt2<- cbind(outputt2, x[,1], x[,2])
	outputlist2 <- list(outputlist2, out)
	varmat[[i]] <- var
	rm("temp")
}

